package com.aspose.pdf.examples.AsposePdfExamples.DocumentObject;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Scanner;

import javax.imageio.ImageIO;

import com.aspose.pdf.Document;
import com.aspose.pdf.Document.CallBackGetHocr;

public class ConvertingNonSearchablePDFToSearchablePDFDocument {

	public static void main(String[] args) {
		final String myDir = "PathToDir";
		Document doc = new Document(myDir + "outFile.pdf");
		// Create callBack - logic recognize text for pdf images. Use outer OCR supports HOCR standard(http://en.wikipedia.org/wiki/HOCR).
		// We have used free google tesseract OCR(http://en.wikipedia.org/wiki/Tesseract_%28software%29)
		CallBackGetHocr cbgh = new CallBackGetHocr() {
			@Override
			public String invoke(java.awt.image.BufferedImage img) {
				File outputfile = new File(myDir + "test.jpg");
				try {
					ImageIO.write(img, "jpg", outputfile);
				} catch (IOException e1) {
					e1.printStackTrace();
				}
				try {
					java.lang.Process process = Runtime.getRuntime().exec("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr");
					System.out.println("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr");
					process.waitFor();

				} catch (IOException e) {
					e.printStackTrace();
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
				// reading out.html to string
				File file = new File(myDir + "out.html");
				StringBuilder fileContents = new StringBuilder((int) file.length());
				Scanner scanner = null;
				try {
					scanner = new Scanner(file);
					String lineSeparator = System.getProperty("line.separator");
					while (scanner.hasNextLine()) {
						fileContents.append(scanner.nextLine() + lineSeparator);
					}
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				} finally {
					if (scanner != null)
						scanner.close();
				}
				// deleting temp files
				File fileOut = new File(myDir + "out.html");
				if (fileOut.exists()) {
					fileOut.delete();
				}
				File fileTest = new File(myDir + "test.jpg");
				if (fileTest.exists()) {
					fileTest.delete();
				}
				return fileContents.toString();
			}
		};
		// End callBack

		doc.convert(cbgh);
		doc.save(myDir + "output971.pdf");
	}
}